package is2.tag;



import is2.data.F2SF;
import is2.data.FV;
import is2.data.Instances;
import is2.data.InstancesTagger;
import is2.data.Long2Int;
import is2.data.Long2IntInterface;
import is2.data.ParametersFloat;
import is2.data.PipeGen;
import is2.data.SentenceData09;
import is2.io.CONLLReader09;
import is2.io.CONLLWriter09;
import is2.tools.IPipe;
import is2.tools.Tool;
import is2.tools.Train;
import is2.util.DB;
import is2.util.Evaluator;
import is2.util.OptionsSuper;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Map.Entry;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;


public class Tagger implements Tool, Train {

	public ExtractorT2 pipe;
	public ParametersFloat params;
	public Long2IntInterface li;
	public MFO mf;
	private OptionsSuper _options;

	/**
	 * Initialize 
	 * @param options
	 */
	public Tagger (Options options) {

		
		// load the model
		try {
			readModel(options);
		} catch (Exception e) {
			e.printStackTrace();
		}	
	}
	public Tagger() {	}

	/**
	 * @param modelFileName the file name of the model
	 */
	public Tagger(String modelFileName) {
		this(new Options(new String[]{"-model",modelFileName}));
	}

	public static void main (String[] args) throws FileNotFoundException, Exception
	{

		long start = System.currentTimeMillis();
		Options options = new Options(args);


		Tagger tagger = new Tagger();

		if (options.train) {

			//		depReader.normalizeOn=false;

			tagger.li = new Long2Int(options.hsize);
			tagger.pipe =  new ExtractorT2 (options, tagger.mf= new MFO());
			
			//tagger.pipe.li =tagger.li;
	  	    
			InstancesTagger is = (InstancesTagger)tagger.pipe.createInstances(options.trainfile);

			tagger.params = new ParametersFloat(tagger.li.size());

			tagger.train(options, tagger.pipe,tagger.params,is);
			tagger.writeModel(options, tagger.pipe, tagger.params);

		}

		if (options.test) {

			tagger.readModel(options);
			
			tagger.out(options,tagger.pipe, tagger.params);
		}

		System.out.println();

		if (options.eval) {
			System.out.println("\nEVALUATION PERFORMANCE:");
			Evaluator.evaluateTagger(options.goldfile, options.outfile,options.format);
		}
		long end = System.currentTimeMillis();
		System.out.println("used time "+((float)((end-start)/100)/10));
	}

	public void readModel(OptionsSuper options)  {

		try{
		pipe = new ExtractorT2(options, mf =new MFO());
		_options=options;
		// load the model
		ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(options.modelName)));
		zis.getNextEntry();
		DataInputStream dis = new DataInputStream(new BufferedInputStream(zis));

		pipe.mf.read(dis);
		pipe.initValues();
		pipe.initFeatures();

		params = new ParametersFloat(0);
		params.read(dis);
		li = new Long2Int(params.parameters.length);
		pipe.read(dis);

		dis.close();

		pipe.types = new String[pipe.mf.getFeatureCounter().get(ExtractorT2.POS)];
		for(Entry<String,Integer> e : pipe.mf.getFeatureSet().get(ExtractorT2.POS).entrySet()) 
			pipe.types[e.getValue()] = e.getKey();

		DB.println("Loading data finished. ");
		} catch(Exception e) {
			e.printStackTrace();
		}
	}
  
	/**
	 * Do the training
	 * @param instanceLengths
	 * @param options
	 * @param pipe
	 * @param params
	 * @throws IOException
	 * @throws InterruptedException
	 * @throws ClassNotFoundException
	 */
	public void train(OptionsSuper options, IPipe pipe, ParametersFloat params, Instances is2) {

		InstancesTagger is = (InstancesTagger)is2;
		String wds[] = mf.reverse(this.pipe.mf.getFeatureSet().get(ExtractorT2.WORD));
		
		int pd[] = new int[this.pipe.types.length];
		for(int k=0;k<pd.length;k++) pd[k]=k;
		
		int del=0; 
		F2SF f = new F2SF(params.parameters); 
		long vs[] = new long[ExtractorT2._MAX];

		int types =this.pipe.types.length;

		double upd = options.numIters*is.size() +1;

		for(int i = 0; i <options.numIters ; i++) {

			long start = System.currentTimeMillis();

			int numInstances = is.size();

			long last= System.currentTimeMillis();
			FV pred = new FV(),gold = new FV();

			int correct =0,count=0;
			System.out.print("Iteration "+i+": ");
			
			for(int n = 0; n < numInstances; n++) {

				if((n+1) % 500 == 0) del= PipeGen.outValueErr(n+1, (count-correct),(float)correct/(float)count,del,last,upd);

				int length = is.length(n);
				
				upd--;
			
				for(int w = 1; w < length; w++) {

					double best  = -1000;
					short bestType = -1;

					int[] lemmas; //= is.lemmas[n];
					if (options.noLemmas)lemmas = new int[is.length(n)];
					else lemmas = is.plemmas[n];
					
					this.pipe.addFeatures(is,n,wds[is.forms[n][w]],w,is.gpos[n],is.forms[n], lemmas, vs);
					
					for(short t=0;t<types;t++) {

						// the hypotheses of a part of speech tag
						long p = t<<ExtractorT2.s_type;
						f.clear();				
						
						// add the features to the vector
						for(int k1=0;vs[k1]!=Integer.MIN_VALUE;k1++) {
							if (vs[k1]>0) f.add(this.li.l2i(vs[k1]|p));						
						}

						if (f.score > best) {
							bestType=t;							
							best =f.score;
						}
					}
					
					count++;
					if (bestType == is.gpos[n][w] ) {
						correct++;
						continue; 
					}
					
					pred.clear();
					for (int k1=0;vs[k1]!=Integer.MIN_VALUE;k1++)  if (vs[k1]>0)	pred.add(this.li.l2i(vs[k1]| bestType<<ExtractorT2.s_type));

					gold.clear();
					for (int k1=0;vs[k1]!=Integer.MIN_VALUE;k1++)  if (vs[k1]>0)	gold.add(this.li.l2i(vs[k1] | is.gpos[n][w]<<ExtractorT2.s_type));

					params.update(pred,gold, (float)upd, 1.0F);
				}
			} 

			long end = System.currentTimeMillis();
			String info = "time "+(end-start);
			PipeGen.outValueErr(numInstances, (count-correct),(float)correct/(float)count,del,last,upd,info);
			System.out.println();
			del=0;
		}

		params.average(options.numIters*is.size());

	}


	/**
	 * Tag a sentence
	 * @param options
	 * @param pipe
	 * @param params
	 * @throws IOException
	 */
	public void out (OptionsSuper options, IPipe pipe, ParametersFloat params) {
		
		try {
	

		long start = System.currentTimeMillis();
// change this backe!!!
//		CONLLReader09 depReader = new CONLLReader09(options.testfile, CONLLReader09.NO_NORMALIZE);
		CONLLReader09 depReader = new CONLLReader09(options.testfile);

		CONLLWriter09 depWriter = new CONLLWriter09(options.outfile);

		System.out.print("Processing Sentence: ");
		pipe.initValues();

		int cnt = 0;
		int del=0;
		while(true) {

			InstancesTagger is = new InstancesTagger();
			is.init(1, mf);
			SentenceData09 instance = depReader.getNext(is);
			if (instance == null || instance.forms == null)	 break;
			
			
			is.fillChars(instance, 0, ExtractorT2._CEND);

			cnt++;


			tag(is, instance);

			SentenceData09 i09 = new SentenceData09(instance);
			i09.createSemantic(instance);
			depWriter.write(i09);

			if(cnt % 100 == 0) del=PipeGen.outValue(cnt, del);

		}
		del=PipeGen.outValue(cnt, del);
		depWriter.finishWriting();

		float min=1000, max=-1000;
		
	//	int r[] = new int[14];
		/*
		for(Entry<Float, Integer> e : map.entrySet()) {
			if(e.getKey()<min)min=e.getKey();
			if(e.getKey()>max)max=e.getKey();
			
			if(e.getKey()<0.2) r[0]++;
			else if(e.getKey()<0.5) r[1]+=e.getValue();
			else if(e.getKey()<0.7) r[2]+=e.getValue();
			else if(e.getKey()<0.8) r[3]+=e.getValue();
			else if(e.getKey()<0.9) r[4]+=e.getValue();
			else if(e.getKey()<1.0) r[5]+=e.getValue();
			else if(e.getKey()<1.2) r[6]+=e.getValue();
			else if(e.getKey()<1.3) r[7]+=e.getValue();
			else if(e.getKey()<1.4) r[8]+=e.getValue();
			else if(e.getKey()<1.5) r[9]+=e.getValue();
			else if(e.getKey()<1.9) r[10]+=e.getValue();
			else if(e.getKey()<2.2) r[11]+=e.getValue();
			else if(e.getKey()<2.5) r[12]+=e.getValue();
			else if(e.getKey()>=2.5) r[13]+=e.getValue();
		}
		*/
	//	for(int k=0;k<r.length;k++) System.out.println(k+" "+r[k][0]+" "+((float)r[k][1]/(float)r[k][0])+" good "+r[k][1]);
	//	System.out.println("min "+min+" "+max);
		
		long end = System.currentTimeMillis();
		System.out.println(PipeGen.getSecondsPerInstnace(cnt,(end-start)));
		System.out.println(PipeGen.getUsedTime(end-start));
		} catch(Exception e) {
			e.printStackTrace();
		}
	}


	public SentenceData09 tag(SentenceData09 instance){
		InstancesTagger is = new InstancesTagger();
		is.init(1, pipe.mf);
		new CONLLReader09().insert(is, instance);
		is.fillChars(instance, 0, ExtractorT2._CEND);
		tag(is, instance);

		return instance;
	}


	private void tag(InstancesTagger is, SentenceData09 instance) {

		int length = instance.ppos.length;

		short[] pos = new short[instance.gpos.length];

		float sc[] =new float[instance.ppos.length];
		
		instance.ppos[0]= is2.io.CONLLReader09.ROOT_POS;
		pos[0]=(short)pipe.mf.getValue(ExtractorT2.POS, is2.io.CONLLReader09.ROOT_POS);

		for(int j = 1; j < length; j++) {

			short bestType = (short)pipe.fillFeatureVectorsOne( instance.forms[j],params, j, is,0,pos,this.li,sc);
			pos[j] = bestType;
			instance.ppos[j]= pipe.types[bestType];
		}

		for(int j = 1; j < length; j++) {

			short bestType = (short)pipe.fillFeatureVectorsOne(instance.forms[j],params, j, is,0,pos,this.li,sc);
			instance.ppos[j]= pipe.types[bestType];
			pos[j]=bestType;			
		}
	}

	/**
	 * Tag a single word and return a n-best list of Part-of-Speech tags.
	 * 
	 * @param is set of sentences
	 * @param instanceIndex index to the sentence in question
	 * @param word word to be tagged 
	 * @return n-best list of Part-of-Speech tags
	 */
	public ArrayList<POS> tag(InstancesTagger is,int instanceIndex, int word, String wordForm) {

		return  pipe.classify( wordForm , params, word, is, instanceIndex, is.pposs[instanceIndex], li);
		
	}
	
	public ArrayList<String> tagStrings(InstancesTagger is,int instanceIndex, int word, String wordForm) {

		ArrayList<POS> plist  = pipe.classify( wordForm , params, word, is, instanceIndex, is.pposs[instanceIndex], li);
		String pos[] = mf.reverse(this.pipe.mf.getFeatureSet().get(ExtractorT2.POS));
	
		ArrayList<String> postags =null;
		for(POS p : plist) {
			try {
				 postags.add(pos[p.p]);
			}catch(Exception e) {
				e.printStackTrace();
			}
		}
		return postags;
	
		
	}

	
	
	/**
	 * Tag a sentence
	 * @param options
	 * @param pipe
	 * @param parametersReranker
	 * @throws IOException
	 */
	public  String[] tag (String[] words, String[] lemmas)  {

		String[] pposs = new String[words.length];

		try {
			pipe.initValues();

			int length = words.length+1;


			InstancesTagger is = new InstancesTagger();
			is.init(1, pipe.mf);
			is.createInstance09(length);

			SentenceData09 instance = new SentenceData09();
			instance.forms = new String[length];
			instance.forms[0]=is2.io.CONLLReader09.ROOT;

			instance.plemmas = new String[length];
			instance.plemmas[0]=is2.io.CONLLReader09.ROOT_LEMMA;

			for(int j = 0; j < words.length; j++) {
				instance.forms[j+1]=words[j];
				instance.plemmas[j+1]=lemmas[j];
			}

			for(int j = 0; j < length; j++) {
				is.setForm(0, j, instance.forms[j]);
				is.setLemma(0, j, instance.plemmas[j]);
			}

			instance.ppos = new String[length];

			is.fillChars(instance, 0, ExtractorT2._CEND);

			this.tag(is, instance);

			for(int j = 0; j < words.length; j++) {
				pposs[j] = instance.ppos[j+1];	
			}
		} catch(Exception e) {
			e.printStackTrace();
		}

		return pposs;


	}

	/* (non-Javadoc)
	 * @see is2.tools.Tool#apply(is2.data.SentenceData09)
	 */
	@Override
	public SentenceData09 apply(SentenceData09 snt) {
		

		SentenceData09 it = new SentenceData09();
		it.createWithRoot(snt);
		it = tag(it);
		SentenceData09 i09 = new SentenceData09(it);
		i09.createSemantic(it);
		return i09;
	}
	
	
	
	/* (non-Javadoc)
	 * @see is2.tools.Train#writeModel(is2.util.OptionsSuper, is2.mtag2.Pipe, is2.data.ParametersFloat)
	 */
	@Override
	public void writeModel(OptionsSuper options, IPipe pipe, is2.data.ParametersFloat params) {
		try{
			ZipOutputStream zos = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(options.modelName)));
			zos.putNextEntry(new ZipEntry("data")); 
			DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(zos));

			this.pipe.mf.writeData(dos);

			DB.println("number of parameters "+params.parameters.length);
			dos.flush();

			params.write(dos);
			pipe.write(dos);
			dos.flush();
			dos.close();

		} catch(Exception e) {
			e.printStackTrace();
		}
	}

}
